#coding=utf-8
import jieba
import jieba.analyse
import jieba.posseg
import db.mysqldb
import sys
import tag.keyword
import logging
reload(sys)
sys.setdefaultencoding('utf-8')
logging.basicConfig(level=logging.DEBUG,
                    format='%(asctime)s %(filename)s[line:%(lineno)d] %(levelname)s %(message)s',
                    datefmt='%a, %d %b %Y %H:%M:%S',
                    filename='error.log',
                    filemode='w')
if __name__=="__main__":
    mydb = db.mysqldb.MySqlDb()
    tables = ["tb_blog","tb_zhihu","tb_jianshu"]
    key = tag.keyword.Keyword('./cfg.json')
    for _, name in enumerate(tables):
        ret = mydb.getAll(name)
        for i in ret:
            # print i[2],i[5]
            tags =  jieba.analyse.extract_tags(i[2], withWeight=True)
            flag = False
            for item in tags:
                ret = key.get(item[0].strip())
                if ret!=None:
                    # print '[+]',item[0],ret
                    mydb.update(name,i[5],ret)
                    flag = True
                    break
            if not flag:
                logging.warning('[-]'+i[5]+' | '+i[2])
                # print '[-]',i[2]
                # for tag in tags:
                #     print tag[0]

    mydb.close()
    # key = tag.keyword.Keyword('./cfg.json')
    # tags = jieba.analyse.extract_tags("区块链", withWeight=True)
    # for item in tags:
    #     print key.get(item[0].strip())